python字符串加法 | 您所在的位置:网站首页 › python 字符串加减 › python字符串加法 |
从Python3字符串统一使用Unicode编码 1.字符串加法 #源码 a="dddddddddd" b="sssssssssss" a = a+b #字节码 0 LOAD_NAME 0 (a) 2 LOAD_NAME 1 (b) 4 BINARY_ADD 6 STORE_NAME 0 (a) 8 LOAD_CONST 0 (None) 10 RETURN_VALUE 【BINARY_ADD】#源码有删减 PyObject *right = POP();#出栈right PyObject *left = TOP();#指向栈顶left PyObject *sum;#新对象sum #针对字符串加法运算的优化 if (PyUnicode_CheckExact(left) &&PyUnicode_CheckExact(right)) { sum = unicode_concatenate(tstate, left, right, f, next_instr); } else { ''' } SET_TOP(sum);#重新设置栈顶 //源码有删减 static PyObject * unicode_concatenate(PyThreadState *tstate, PyObject *v, PyObject *w, PyFrameObject *f, const _Py_CODEUNIT *next_instr) { PyObject *res; if (Py_REFCNT(v) == 2) { int opcode, oparg;#字节码指令和指令参数 NEXTOPARG();#获取下一字节码指令 switch (opcode) {#根据下一字节码指令进行优化 case STORE_FAST: { '''' } case STORE_DEREF: { '''' } case STORE_NAME:#这是我们执行的情况 { PyObject *names = f->f_code->co_names;#获取code对象name元组 PyObject *name = GETITEM(names, oparg);#获取参数名 PyObject *locals = f->f_locals;#获取局部参数字典 if (locals && PyDict_CheckExact(locals)) { PyObject *w = PyDict_GetItemWithError(locals, name); #两种情况直接返回 if ((w == v && PyDict_DelItem(locals, name) != 0) || (w == NULL && _PyErr_Occurred(tstate))) { Py_DECREF(v); return NULL; } } break; } } } res = v; PyUnicode_Append(&res, w);#通常情况调用的函数 return res; } //源码有删减 void PyUnicode_Append(PyObject **p_left, PyObject *right) { PyObject *left, *res; Py_UCS4 maxchar, maxchar2; Py_ssize_t left_len, right_len, new_len; ''' ''' /* Shortcuts */ if (left == unicode_empty) { #左为''情况 Py_DECREF(left); Py_INCREF(right); *p_left = right; return; } if (right == unicode_empty) #右为''情况 return; #拼接后长度计算 left_len = PyUnicode_GET_LENGTH(left); right_len = PyUnicode_GET_LENGTH(right); if (left_len > PY_SSIZE_T_MAX - right_len) { #PY_SSIZE_T_MAX 7FFF FFFF也就是(unsigned int)/2 - 1,字符串最大长度 goto error; } new_len = left_len + right_len;#拼接后长度 if (unicode_modifiable(left) #左类型检查 && PyUnicode_CheckExact(right)#右类型检查 #左类型必须是和右类型相同的类型或先后兼容的类型(1:Py_UCS1,2:Py_UCS2,4:Py_UCS4) && PyUnicode_KIND(right) #类型不兼容或有操作串是ascii码的情况 maxchar = PyUnicode_MAX_CHAR_VALUE(left); maxchar2 = PyUnicode_MAX_CHAR_VALUE(right); maxchar = Py_MAX(maxchar, maxchar2); #创建一个新对象, res = PyUnicode_New(new_len, maxchar); if (res == NULL) goto error; #先拷贝左边 _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); #再拷贝右边 _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); #左指向新对象了,以前的对象就被垃圾回收了 *p_left = res; } return; error: Py_CLEAR(*p_left); } //源码有删减 static int unicode_resize(PyObject **p_unicode, Py_ssize_t length) { PyObject *unicode; Py_ssize_t old_length; unicode = *p_unicode; #获取传入unicode长度 if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); else old_length = PyUnicode_GET_LENGTH(unicode); #不用处理 if (old_length == length) return 0; #长度为0,指向empty对象 if (length == 0) { _Py_INCREF_UNICODE_EMPTY(); if (!unicode_empty) return -1; Py_SETREF(*p_unicode, unicode_empty); return 0; } #不能更改的情况 if (!unicode_modifiable(unicode)) { #创建一个新unicode对象,并拷贝原字符串内容 PyObject *copy = resize_copy(unicode, length); if (copy == NULL) return -1; return 0; } #空间足够的情况 if (PyUnicode_IS_COMPACT(unicode)) { #调用操作系统relloc函数重新分配内存 PyObject *new_unicode = resize_compact(unicode, length); if (new_unicode == NULL) return -1; *p_unicode = new_unicode; return 0; } #调用操作系统relloc函数重新分配内存 return resize_inplace(unicode, length); } //有删减 static PyObject* resize_compact(PyObject *unicode, Py_ssize_t length) { Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; int share_wstr; PyObject *new_unicode; char_size = PyUnicode_KIND(unicode); //ascii类型长度 if (PyUnicode_IS_ASCII(unicode)) struct_size = sizeof(PyASCIIObject); //unicode类型长度 else struct_size = sizeof(PyCompactUnicodeObject); share_wstr = _PyUnicode_SHARE_WSTR(unicode); //超过最大长度 if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); return NULL; } //新的大小 new_size = (struct_size + (length + 1) * char_size); if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } //调用relloc函数分配 new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size); if (new_unicode == NULL) { _Py_NewReference(unicode); PyErr_NoMemory(); return NULL; } unicode = new_unicode; //设置length长度 _PyUnicode_LENGTH(unicode) = length; //unicode编码设置长度 if (share_wstr) { _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } //其他情况 else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; } #define PyUnicode_WRITE(kind, data, index, value) \ do { \ switch ((kind)) { \ case PyUnicode_1BYTE_KIND: { \ ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ break; \ } \ case PyUnicode_2BYTE_KIND: { \ ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ break; \ } \ default: { \ assert((kind) == PyUnicode_4BYTE_KIND); \ ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ } \ } \ } while (0) PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), length, 0); return unicode; } static int resize_inplace(PyObject *unicode, Py_ssize_t length) { wchar_t *wstr; Py_ssize_t new_size; //一般情况 if (PyUnicode_IS_READY(unicode)) { Py_ssize_t char_size; int share_wstr, share_utf8; void *data; data = _PyUnicode_DATA_ANY(unicode);//data char_size = PyUnicode_KIND(unicode);//size share_wstr = _PyUnicode_SHARE_WSTR(unicode);//wstr share_utf8 = _PyUnicode_SHARE_UTF8(unicode);//utf-8 //超过最大长度 if (length > (PY_SSIZE_T_MAX / char_size - 1)) { PyErr_NoMemory(); return -1; } //新的长度 new_size = (length + 1) * char_size; //不是utf8类型却有其内存 if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } //调用relloc data = (PyObject *)PyObject_REALLOC(data, new_size); _PyUnicode_DATA_ANY(unicode) = data; //wstr类型 if (share_wstr) { _PyUnicode_WSTR(unicode) = data; _PyUnicode_WSTR_LENGTH(unicode) = length; } //uft8类型 if (share_utf8) { _PyUnicode_UTF8(unicode) = data; _PyUnicode_UTF8_LENGTH(unicode) = length; } //设置长度 _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } } //长度超过最大长度 /* check for integer overflow */ if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) { PyErr_NoMemory(); return -1; } //重新新建对象,当成wstr类型处理 new_size = sizeof(wchar_t) * (length + 1); wstr = _PyUnicode_WSTR(unicode); wstr = PyObject_REALLOC(wstr, new_size); if (!wstr) { PyErr_NoMemory(); return -1; } _PyUnicode_WSTR(unicode) = wstr; _PyUnicode_WSTR(unicode)[length] = 0; _PyUnicode_WSTR_LENGTH(unicode) = length; return 0; } static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; //不是wstr类型 if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { PyObject *copy; //新建对象 copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); //长度 copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); //快速内存拷贝 _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); return copy; } //当成wstr类型处理 else { PyObject *w; w = (PyObject*)_PyUnicode_New(length); if (w == NULL) return NULL; copy_length = _PyUnicode_WSTR_LENGTH(unicode); copy_length = Py_MIN(copy_length, length); memcpy(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), copy_length * sizeof(wchar_t)); return w; } } 标签:PyUnicode,return,Python,Py,length,PyObject,探究,unicode,加法 来源: https://blog.csdn.net/qq_33913982/article/details/104757198 |
CopyRight 2018-2019 实验室设备网 版权所有 |